sample_data(ps.b.r)$Site.name = factor(sample_data(ps.b.r)$Site.name,
levels = c("Blaenavon","Cefn Hengoed","Mountain Gate","Taff Bargoed",
"Glyncastle","Dinas","Ynysarwed","Celynen North","Morlais",
"Lindsay","Six Bells","Crumlin Navigation","Taffs Well"))
metadata<-sample_data(ps.b.r)
#Temp
temp_table.plot<-ggplot(metadata,
aes(x=Site.name, y=Temp.C, fill=Month)) +
geom_point(aes(fill=Month, color=Month),
size=2,
position = position_dodge(width=0.5)) +
geom_hline(yintercept = 13.4,
color="dark red",
linetype="dashed") +
geom_hline(yintercept = 11,
color="gray",
linetype="dashed") +
theme(axis.title.x=element_blank(),
axis.title.y = element_text(size=14),
axis.text.x = element_blank(),
axis.text.y = element_text(size=12),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
legend.background = element_rect(fill="gray95", size=.5, linetype="dotted")) +
annotate("text", "Mountain Gate", 14, vjust = -1, hjust=.45, size= 4,
label = "Average mine water temperature (Farr et al, 2016)") +
labs(y=expression(paste("Temperature (",degree,"C)")))
#DO
do.plot<-ggplot(metadata,
aes(x=Site.name, y=`DO..sat`, fill=Month)) +
geom_point(aes(fill=Month, color=Month),
size=2,
position = position_dodge(width=0.5)) +
geom_line(aes(group=Month, color=Month),
stat="smooth", method="loess",
alpha = 0.4, size=1.5) +
geom_hline(yintercept = 50,
color="gray60",
linetype="dashed") +
theme(axis.title.x=element_blank(),
axis.title.y = element_text(size=14),
axis.text.x = element_blank(),
axis.text.y = element_text(size=12),
legend.position = "none") +
labs(y="Dissolved Oxygen (%)")
#Sulfate
so4.plot<-ggplot(metadata,
aes(x=Site.name, y=`SO4..µg.ml`, fill=Month)) +
geom_point(aes(fill=Month, color=Month),
size=2,
position = position_dodge(width=0.5)) +
geom_line(aes(group=Month, color=Month),
stat="smooth", method="loess",
alpha = 0.4, size=1.5) +
geom_hline(yintercept = 500,
color="gray60",
linetype="dashed") +
theme(axis.title.x=element_blank(),
axis.title.y = element_text(size=14),
axis.text.x = element_blank(),
axis.text.y = element_text(size=12),
legend.position = "none") +
labs(y=expression(paste("SO"[4]^{-2},"(",mu,"g.mL"^{-1},")"))) +
guides(size=FALSE)
#pH
pH.plot<-ggplot(metadata,
aes(x=Site.name, y=`pH`, fill=Month)) +
geom_point(aes(fill=Month, color=Month),
size=2,
position = position_dodge(width=0.5)) +
geom_line(aes(group=Month, color=Month),
stat="smooth", method="loess",
alpha = 0.4, size=1.5) +
geom_hline(yintercept = 7,
color="gray60",
linetype="dashed") +
theme(axis.title.x=element_blank(),
axis.title.y = element_text(size=14),
axis.text.x = element_blank(),
axis.text.y = element_text(size=12),
legend.position = "none") +
labs(y="pH") +
guides(size=FALSE)
#EC
ec.plot<-ggplot(metadata,
aes(x=Site.name, y=`EC.uS.cm`, fill=Month)) +
geom_point(aes(fill=Month, color=Month),
size=2,
position = position_dodge(width=0.5)) +
geom_line(aes(group=Month, color=Month),
stat="smooth", method="loess",
alpha = 0.4, size=1.5) +
geom_hline(yintercept = 2000,
color="gray60",
linetype="dashed") +
geom_hline(yintercept = 1000,
color="grey80",
linetype="dashed") +
scale_y_continuous(breaks=c(0,500,1000,1500,2000,2500,3000)) +
theme(axis.title.x=element_blank(),
axis.title.y = element_text(size=14),
axis.text.x = element_text(angle = 25, hjust = 1, size= 14),
axis.text.y = element_text(size=12),
legend.position = "top",
legend.title = element_text(face = "bold"),
legend.background = element_rect(fill="gray95", size=.5, linetype="dotted")) +
labs(y=expression(paste("EC (",mu,"S.cm"^{-1},")")))
plot_grid(temp_table.plot, do.plot, so4.plot, pH.plot, ec.plot, ncol = 1, align='v', rel_heights = c(1.75,1,1,1,1.75))
And does sulfate ‘more or less’ negatively correlate with DO just to make sure?
do.so4.plot<-ggplot(metadata,
aes(x=Site.name)) +
geom_point(aes(y=`DO..sat`,
fill="Dissolved Oxygen", color="Dissolved Oxygen",
shape=Month),
size=2,
position = position_dodge(width=0.5)) +
geom_point(aes(y=`SO4..µg.ml`/12,
fill="Sulfate", color="Sulfate",
shape=Month),
size=2,
position = position_dodge(width=0.5)) +
geom_line(aes(y=`DO..sat`,
group=Month,
color="Dissolved Oxygen"),
stat="smooth", method="loess",
alpha = 0.4, size=1.5) +
geom_line(aes(y=`SO4..µg.ml`/12,
group=Month,
color="Sulfate"),
stat="smooth", method="loess",
alpha = 0.4, size=1.5) +
scale_colour_manual(values = c("dodgerblue1", "firebrick2")) +
scale_y_continuous(sec.axis = sec_axis(~.*12, name = expression(paste("SO"[4]^{-2},"(",mu,"g.mL"^{-1},")")))) +
labs(y="Dissolved Oxygen (%)",
colour = "Parameter") +
theme(axis.title.x=element_blank(),
axis.title.y = element_text(size=14),
axis.text.x = element_text(angle=35, hjust = 1,vjust = 1),
axis.text.y = element_text(size=12),
legend.position = "top",
legend.justification = "center",
legend.text = element_text(size = 12),
legend.title = element_text(size = 13, face = "bold"),
legend.direction = "vertical") +
guides(fill = FALSE)
do.so4.plot
Yep-ish.
But how does the overal picture look regarding how variables relate to one another?
#remove all columns with one or more NA entry
#NA in DO substituted to the mean of the two values in the site
sgg_metadata<-as(sample_data(ps.b.r),"data.frame")
sgg_metadata$DO..sat[is.na(sgg_metadata$DO..sat)] <- 44.1
sgg_metadata_noNA = sgg_metadata[ , colSums(is.na(sgg_metadata)) == 0]
colnames(sgg_metadata_noNA) <- c("Sample_Code","Sample.ID","Sample_type","Site.ID","Site.name","Time_point","Month","Batch","Date", "Time",
"Temperature","EC","DO","K","Na","Cl","SO4","6_Li","7_Li","7_Li_He","B","Sc","Mn","Co","Ni","Ga","As","Se",
"Rb","Sr","Y","Te","Cs","Ba","La","Nd","Eu","Er","Lu","U")
sgg_metadata_noNA_april<-subset(sgg_metadata_noNA,
Month %in% c("April"))
sgg_metadata_noNA_august<-subset(sgg_metadata_noNA,
Month %in% c("August"))
sgg_metadata_noNA_december<-subset(sgg_metadata_noNA,
Month %in% c("December"))
cols_to_go_april <- names(sgg_metadata_noNA_april) %in% c("Sample_Code", "Sample.ID", "Sample_type","Site.ID",
"Site.name","Time_point","Month","Batch",
"Date","Time")
cols_to_go_august <- names(sgg_metadata_noNA_august) %in% c("Sample_Code", "Sample.ID", "Sample_type","Site.ID",
"Site.name","Time_point","Month","Batch",
"Date","Time")
cols_to_go_december <- names(sgg_metadata_noNA_december) %in% c("Sample_Code", "Sample.ID", "Sample_type","Site.ID",
"Site.name","Time_point","Month","Batch",
"Date","Time")
sgg_metadata_noNA_april_f <- sgg_metadata_noNA_april[!cols_to_go_april]
sgg_metadata_noNA_august_f <- sgg_metadata_noNA_august[!cols_to_go_august]
sgg_metadata_noNA_december_f <- sgg_metadata_noNA_december[!cols_to_go_december]
april_corr<-ggcorr(sgg_metadata_noNA_april_f, method = c("all.obs", "spearman"),
palette = "RdBu",
nbreaks = 10, hjust = 1,
size=4, layout.exp = 6) +
ggtitle("April")
august_corr<-ggcorr(sgg_metadata_noNA_august_f, method = c("all.obs", "spearman"),
palette = "RdBu",
nbreaks = 10, hjust = 1,
size=4, layout.exp = 6) +
ggtitle("August")
december_corr<-ggcorr(sgg_metadata_noNA_december_f, method = c("all.obs", "spearman"),
palette = "RdBu",
nbreaks = 10, hjust = 1,
size=4, layout.exp = 6) +
ggtitle("December")
Pearson assumes linearity between variables as well as normal distributions of data and is sensible to transformation. Spearman is a nonparametric measure of rank correlation as Kendall is, meaning they don’t care about transformations. Kendall meaures strength of dependence between two variables. Spearman does not make assumptions of data distribution and measures degree of association between two variables.
There’s clearly a lot of variation, but definitely there’s loads more to learn on how these variables relate.
iron_theme=theme(
axis.title.x=element_blank(),
axis.text.x = element_text(angle = 25, hjust = 1, vjust = 1, size= 12),
axis.text.y = element_text(size=8),
axis.title.y = element_text(angle = 0, size=10),
legend.title = element_text(size=8, face = "bold"))
KONEFE_Fe_mgL_plot<-ggplot(swales_ca_metadata_2016,
aes(x=Site, y=`KONEFE_Fe_mgL`, fill=Month)) +
geom_point(aes(fill=Month, color=Month),
size=2,
position = position_dodge(width=0.5)) +
guides(fill = guide_legend(nrow = 1),
color = guide_legend(nrow = 1)) +
labs(y=expression(atop("KONEFE Iron as Fe",paste("(mg.L"^{-1},")")))) +
iron_theme +
theme(legend.position = "bottom",
legend.justification = "left",
legend.text=element_text(size=8),
axis.text.x = element_blank())
D_Fe_mgL_plot<-ggplot(swales_ca_metadata_2016,
aes(x=Site, y=`D_Fe_mgL`, fill=Month)) +
geom_point(aes(fill=Month, color=Month),
size=2,
position = position_dodge(width=0.5)) +
guides(fill = guide_legend(nrow = 1),
color = guide_legend(nrow = 1)) +
labs(y=expression(atop("Iron as Fe (Dissolved)", paste("(mg.L"^{-1},")")))) +
iron_theme +
theme(legend.position = "none",
axis.text.x = element_blank())
ICPWATVAR_Fe_mgL_plot<-ggplot(swales_ca_metadata_2016,
aes(x=Site, y=`ICPWATVAR_Fe_mgL`, fill=Month)) +
geom_point(aes(fill=Month, color=Month),
size=2,
position = position_dodge(width=0.5)) +
guides(fill = guide_legend(nrow = 1),
color = guide_legend(nrow = 1)) +
labs(y=expression(atop("Iron as Fe (Total) ICPWATVAR", paste("(mg.L"^{-1},")")))) +
iron_theme +
theme(legend.position = "bottom",
legend.justification = "left",
legend.text=element_text(size=8),
axis.text.x = element_blank())
Fe2_mgL_plot<-ggplot(swales_ca_metadata_2016,
aes(x=Site, y=`Fe2_mgL`, fill=Month)) +
geom_point(aes(fill=Month, color=Month),
size=2,
position = position_dodge(width=0.5)) +
guides(fill = guide_legend(nrow = 1),
color = guide_legend(nrow = 1)) +
labs(y=expression(atop("Ferrous Iron as Fe"^{2+""}, paste("(mg.L"^{-1},")")))) +
iron_theme +
theme(legend.position = "none")
plot_grid(KONEFE_Fe_mgL_plot, D_Fe_mgL_plot, ICPWATVAR_Fe_mgL_plot, Fe2_mgL_plot,
align = 'v', ncol = 1,
rel_heights = c(0.1,0.1,0.1,0.1))
So, in 2016, while most of the other sites were close to 0, it seems a LOT of iron was being detected in Ynysarwed. Are there taxonomical evidences of more iron users there? Not really as seen on SGG-GenTaxonomy.html.
An adonis test tests “how is variation attributed to different experimental” variables, using distance matrices. It analyses how variables can be sources of variation of distance matrices and depends on fitting linear models to distance matrices. AKA PERMANOVA.
Null hypothesis: tested metadata categories have the same centroid.
metadata<-as(sample_data(ps.b.r),"data.frame")
metadata$DO..sat[is.na(metadata$DO..sat)] <- 44.1
metadata_noNA = metadata[ , colSums(is.na(metadata)) == 0]
ps_sig <- phyloseq(otu_table(SGG_SVt, taxa_are_rows=FALSE),
sample_data(metadata_noNA),
tax_table(SGG_Tax))
sample_data(ps_sig)$Month = factor(sample_data(ps_sig)$Month,
levels = c("April","August","December","Control"))
ps_sig_ntf = subset_samples(ps_sig, Site.name != "Taffs Well PUMPED")
ps_sig_ntf_nc = subset_samples(ps_sig_ntf, Sample_type != "Control")
ps_sig_ntf_nc_noab = subset_taxa(ps_sig_ntf_nc, Kingdom %in% c("Archaea", "Bacteria"))
ps_cap_r = transform_sample_counts(ps_sig_ntf_nc_noab, function(x) x/sum(x))
set.seed(1)
# Calculate Jensen-Shannon distance matrix
ps_cap_r_jsd <- phyloseq::distance(ps_cap_r, method = "jsd")
# make a data frame from the sample_data
sampledf_cap <- data.frame(sample_data(ps_cap_r))
adonis(ps_cap_r_jsd ~ Site.name + Temp.C + Month + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X78.Se.ng.ml + X85.Rb.ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X125.Te.ng.ml + X133.Cs.ng.ml + X135.Ba.ng.ml + X139.La.ng.ml + X146.Nd.ng.ml + X153.Eu.ng.ml + X166.Er.ng.ml + X175.Lu.ng.ml + X238.U.ng.ml,
data = sampledf_cap, strata = sampledf_cap$Time_point, permutations=1e3)
##
## Call:
## adonis(formula = ps_cap_r_jsd ~ Site.name + Temp.C + Month + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X78.Se.ng.ml + X85.Rb.ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X125.Te.ng.ml + X133.Cs.ng.ml + X135.Ba.ng.ml + X139.La.ng.ml + X146.Nd.ng.ml + X153.Eu.ng.ml + X166.Er.ng.ml + X175.Lu.ng.ml + X238.U.ng.ml, data = sampledf_cap, permutations = 1000, strata = sampledf_cap$Time_point)
##
## Blocks: strata
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Site.name 12 17.2135 1.43446 56.971 0.82991 0.000999 ***
## Temp.C 1 0.1026 0.10264 4.076 0.00495 0.000999 ***
## Month 2 0.1955 0.09774 3.882 0.00942 0.000999 ***
## EC.uS.cm 1 0.0528 0.05277 2.096 0.00254 0.020979 *
## DO..sat 1 0.0989 0.09888 3.927 0.00477 0.000999 ***
## K.µg.ml 1 0.0356 0.03561 1.414 0.00172 0.174825
## Na.µg.ml 1 0.0390 0.03897 1.548 0.00188 0.108891
## Cl..µg.ml 1 0.1391 0.13907 5.523 0.00670 0.000999 ***
## SO4..µg.ml 1 0.0638 0.06384 2.535 0.00308 0.011988 *
## X6.Li.ng.ml 1 0.0482 0.04822 1.915 0.00232 0.039960 *
## X7.Li.ng.ml 1 0.0404 0.04041 1.605 0.00195 0.087912 .
## X7.Li..He..ng.ml 1 0.0187 0.01868 0.742 0.00090 0.685315
## X11.B.ng.ml 1 0.0280 0.02796 1.111 0.00135 0.359640
## X45.Sc.ng.ml 1 0.0492 0.04917 1.953 0.00237 0.044955 *
## X55.Mn.ng.ml 1 0.0833 0.08326 3.307 0.00401 0.001998 **
## X59.Co.ng.ml 1 0.0429 0.04287 1.703 0.00207 0.086913 .
## X60.Ni.ng.ml 1 0.0468 0.04678 1.858 0.00226 0.056943 .
## X69.Ga.ng.ml 1 0.0192 0.01919 0.762 0.00093 0.689311
## X75.As..He..ng.ml 1 0.0325 0.03249 1.290 0.00157 0.223776
## X78.Se.ng.ml 1 0.0364 0.03640 1.446 0.00176 0.165834
## X85.Rb.ng.ml 1 0.0564 0.05639 2.240 0.00272 0.015984 *
## X88.Sr.ng.ml 1 0.0360 0.03599 1.429 0.00174 0.156843
## X89.Y.ng.ml 1 0.1430 0.14302 5.680 0.00690 0.000999 ***
## X125.Te.ng.ml 1 0.0204 0.02038 0.809 0.00098 0.642358
## X133.Cs.ng.ml 1 0.0140 0.01401 0.557 0.00068 0.874126
## X135.Ba.ng.ml 1 0.0406 0.04056 1.611 0.00196 0.100899
## X139.La.ng.ml 1 0.1084 0.10840 4.305 0.00523 0.000999 ***
## X146.Nd.ng.ml 1 0.0288 0.02881 1.144 0.00139 0.297702
## X153.Eu.ng.ml 1 0.0219 0.02194 0.871 0.00106 0.544456
## X166.Er.ng.ml 1 0.0188 0.01884 0.748 0.00091 0.710290
## X175.Lu.ng.ml 1 0.0171 0.01708 0.678 0.00082 0.727273
## X238.U.ng.ml 1 0.0369 0.03685 1.464 0.00178 0.125874
## Residuals 72 1.8129 0.02518 0.08740
## Total 116 20.7414 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Site ID significantly explains >80% of the variation. Does this change with a normalised metadata table?
norm_metadata<-read.csv("/media/andre/B2F8C9A0F8C962E9/SGG_16S_analysis/SGG_16S_metadata/SGG_metadata_norm.csv", header=TRUE)
#does this change with a normalised metadata table?
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
norm_metadata$Sample_Code<-as.numeric(norm_metadata$Sample_Code)
norm_metadata_s <- norm_metadata[order(norm_metadata$Sample_Code),]
norm_metadata_s$DO..sat[is.na(norm_metadata_s$DO..sat)] <- 44.1
sampledf_cap_norm <- as.data.frame(lapply(norm_metadata_s, normalize))
sampledf_cap_norm_noNA = sampledf_cap_norm[ , colSums(is.na(sampledf_cap_norm)) == 0]
#remove Sample_Code, substitute with originals
cols_to_go_sampledf_cap_norm_noNA <- names(sampledf_cap_norm_noNA) %in% c("Sample_Code")
sampledf_cap_norm_noNA_f <- sampledf_cap_norm_noNA[!cols_to_go_sampledf_cap_norm_noNA]
sampledf_cap_norm_noNA_f$Sample_Code <- norm_metadata_s$Sample_Code
sampledf_cap_norm_noNA_f <- sampledf_cap_norm_noNA_f %>% select(Sample_Code, everything())
order_jsd<-labels(ps_cap_r_jsd)
sampledf_cap_norm_noNA_f_ord<-sampledf_cap_norm_noNA_f[match(order_jsd, sampledf_cap_norm_noNA_f$Sample_Code),]
rownames(sampledf_cap_norm_noNA_f_ord) <- sampledf_cap_norm_noNA_f_ord$Sample_Code
norm_adonis = adonis(ps_cap_r_jsd ~ Site.ID + Temp.C + Month + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X78.Se.ng.ml + X85.Rb.ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X125.Te.ng.ml + X133.Cs.ng.ml + X135.Ba.ng.ml + X139.La.ng.ml + X146.Nd.ng.ml + X153.Eu.ng.ml + X166.Er.ng.ml + X175.Lu.ng.ml + X238.U.ng.ml,
data = sampledf_cap_norm_noNA_f_ord, permutations=1e3)
#Choose the 5 variables explaining the most variation
norm_adonis_df=as(norm_adonis$aov.tab, "data.frame")
head(norm_adonis_df[order(-norm_adonis_df$R2),], n=7)
## Permutation: free
## Number of permutations: 1000
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Total 116 20.7414 1.00000
## Residuals 84 2.7196 0.03238 0.13112
## Temp.C 1 2.3025 2.30254 71.119 0.11101 0.000999 ***
## EC.uS.cm 1 2.1577 2.15772 66.646 0.10403 0.000999 ***
## Site.ID 1 1.8927 1.89267 58.459 0.09125 0.000999 ***
## K.µg.ml 1 1.3659 1.36591 42.189 0.06585 0.000999 ***
## Na.µg.ml 1 1.3646 1.36462 42.149 0.06579 0.000999 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
It does! Only Temperature and EC significantly explain more than 10% of variation. Also, loads more variables seem to significantly correlate to microbial community structure!
Testing these two variables then, to understand if their adonis results are significant:
Homogeneity of dispersion test for significant categories:
Null hypothesis: tested metadata categories have the same dispersions (variance). betadisper calculates average distances of samples to group centroids (from metadata categories) permutest calculates ANOVAS on average distances from betadisper() to test if one or more groups is more variable than others
#Temperature
beta_t <- betadisper(ps_cap_r_jsd, sampledf_cap_norm_noNA_f_ord$Temp.C)
permutest(beta_t)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 38 1.1885 0.031276 1.8728 999 0.046 *
## Residuals 78 1.3026 0.016700
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#EC
beta_ec <- betadisper(ps_cap_r_jsd, sampledf_cap_norm_noNA_f_ord$EC.uS.cm)
permutest(beta_ec)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 105 0.309564 0.00294823 768.85 999 0.573
## Residuals 11 0.000042 0.00000383
This means group centroids for temperature and EC are not significantly shared, thus meaning that the adonis test was meaningful.
Using all significant variables for the formula, plotting only the ones explaining 5% or more variation.
#remove rows 73,74,75 (Taff's Well pumped samples) from norm_metadata and create new phyloseq object with normalised metadata
remove = c("73","74","75")
sampledf_cap_norm_noNA_f_ord_final <- sampledf_cap_norm_noNA_f_ord[!rownames(sampledf_cap_norm_noNA_f_ord) %in% remove, ]
sampledf_cap_norm_noNA_f_ord_final$Month_cat <- sampledf_cap$Month
sampledf_cap_norm_noNA_f_ord_final$Site.name <- sampledf_cap$Site.name
ps_cap_r_norm <- phyloseq(otu_table(ps_cap_r),
tax_table(ps_cap_r),
sample_data(sampledf_cap_norm_noNA_f_ord_final))
#CAP with significant normalised variables
CAP_ps_cap_r_norm <- ordinate(ps_cap_r_norm,
method = "CAP",
distance = "jsd",
k=3, trymax=1e3, weighted=TRUE,
formula = ~ Site.ID + Temp.C + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X133.Cs.ng.ml + X139.La.ng.ml + X238.U.ng.ml)
CAP_norm_sig_plot_a12 <- plot_ordination(ps_cap_r_norm,
ordination = CAP_ps_cap_r_norm, color = "Site.name", axes = c(1,2)) +
aes(shape = Month_cat) +
geom_point(aes(colour = Site.name), alpha = 0.7, size = 4) +
geom_polygon(aes(fill=Site.name), alpha=0.5) +
geom_point(aes(colour = Site.name), size = 1.5)+
scale_color_manual(values = colorRampPalette(brewer.pal(12, "Paired"))(13)) +
scale_fill_manual(values = colorRampPalette(brewer.pal(12, "Paired"))(13)) +
theme(legend.position = "none")
arrowmat_CAP_norm <- vegan::scores(CAP_ps_cap_r_norm, display = "bp")
arrowmat_CAP_norm<-subset(arrowmat_CAP_norm,
rownames(arrowmat_CAP_norm) %in% c("Temp.C","EC.uS.cm","Site.ID","K.µg.ml","Na.µg.ml"))
arrowdf_CAP_norm <- data.frame(labels = rownames(arrowmat_CAP_norm), arrowmat_CAP_norm)
rownames(arrowdf_CAP_norm) <- c("Site ID","Temperature","EC","K","Na")
arrowdf_CAP_norm$labels <- c("Site ID","Temperature","EC","K","Na")
arrow_map_CAP_norm <- aes(xend = CAP1, yend = CAP2,
x = 0, y = 0,
shape = NULL, color = NULL,
label = labels)
label_map_CAP_norm <- aes(x = 1.1 * CAP1, y = 1.3 * CAP2,
shape = NULL, color = NULL,
label = labels)
arrowhead_CAP_norm = arrow(length = unit(0.02, "npc"))
CAP_norm_sig_plot_a12 = CAP_norm_sig_plot_a12 +
geom_segment(mapping = arrow_map_CAP_norm, size = .5,
data = arrowdf_CAP_norm, color = "gray", arrow = arrowhead_CAP_norm) +
geom_text(mapping = label_map_CAP_norm, size = 4,
data = arrowdf_CAP_norm, show.legend = FALSE)
arrowmat_CAP_norm_13 <- vegan::scores(CAP_ps_cap_r_norm, display = "bp", choices=c(1,3))
arrowmat_CAP_norm_13<-subset(arrowmat_CAP_norm_13,
rownames(arrowmat_CAP_norm_13) %in% c("Temp.C","EC.uS.cm","Site.ID","K.µg.ml","Na.µg.ml"))
arrowdf_CAP_norm_13 <- data.frame(labels = rownames(arrowmat_CAP_norm_13), arrowmat_CAP_norm_13)
rownames(arrowdf_CAP_norm_13) <- c("Site ID","Temperature","EC","K","Na")
arrowdf_CAP_norm_13$labels <- c("Site ID","Temperature","EC","K","Na")
arrow_map_CAP_norm_13 <- aes(xend = CAP1, yend = CAP3,
x = 0, y = 0,
shape = NULL, color = NULL,
label = labels)
label_map_CAP_norm_13 <- aes(x = 1.1 * CAP1, y = 1.3 * CAP3,
shape = NULL, color = NULL,
label = labels)
arrowhead_CAP_norm_13 = arrow(length = unit(0.02, "npc"))
CAP_norm_sig_plot_a13 <- plot_ordination(ps_cap_r_norm,
ordination = CAP_ps_cap_r_norm, color = "Site.name",axes = c(1,3)) +
aes(shape = Month_cat) +
geom_point(aes(colour = Site.name), alpha = 0.7, size = 4) +
geom_polygon(aes(fill=Site.name), alpha=0.5) +
geom_point(aes(colour = Site.name), size = 1.5) +
geom_segment(mapping = arrow_map_CAP_norm_13, size = .5,
data = arrowdf_CAP_norm_13, color = "gray", arrow = arrowhead_CAP_norm_13) +
geom_text(mapping = label_map_CAP_norm_13, size = 4,
data = arrowdf_CAP_norm_13, show.legend = FALSE)+
scale_color_manual(values = colorRampPalette(brewer.pal(12, "Paired"))(13)) +
scale_fill_manual(values = colorRampPalette(brewer.pal(12, "Paired"))(13))
ps.b.r.CAP_norm_sig.plot<-plot_grid(CAP_norm_sig_plot_a12, CAP_norm_sig_plot_a13,
labels=c("A","B"))
ps.b.r.CAP_norm_sig.plot
So, as previously seen, two major groups composed of high-S and high-Fe samples. Temperature seems to play a role in defining high-S sample groups, but this changes throughout the year.
Is this ordination’s goodness of fit better than random models? i.e. are the constraints in this model significant?
anova(CAP_ps_cap_r_norm, perm=1e3)
## Permutation test for capscale under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: capscale(formula = distance ~ Site.ID + Temp.C + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X133.Cs.ng.ml + X139.La.ng.ml + X238.U.ng.ml, data = data, k = 3, trymax = 1000, weighted = TRUE)
## Df SumOfSqs F Pr(>F)
## Model 23 17.5620 22.335 0.001 ***
## Residual 93 3.1794
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As before, but now what microbial groups are influenced by these variables? More ordination on the way.
ps_cap_r_norm_april = subset_samples(ps_cap_r_norm, Month_cat == "April")
ps_cap_r_norm_april_f = filter_taxa(ps_cap_r_norm_april, function(x) sum(x) > .01, TRUE)
ps_cap_r_norm_august = subset_samples(ps_cap_r_norm, Month_cat == "August")
ps_cap_r_norm_august_f = filter_taxa(ps_cap_r_norm_august, function(x) sum(x) > .01, TRUE)
ps_cap_r_norm_december = subset_samples(ps_cap_r_norm, Month_cat == "December")
ps_cap_r_norm_december_f = filter_taxa(ps_cap_r_norm_december, function(x) sum(x) > .01, TRUE)
CAP_norm_ps_cap_r_norm_april_f <- ordinate(ps_cap_r_norm_april_f,
method = "CAP",
distance = "jsd",
k=3, trymax=1e3, weighted=TRUE,
formula = ~ Site.ID + Temp.C + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X133.Cs.ng.ml + X139.La.ng.ml + X238.U.ng.ml)
CAP_norm_ps_cap_r_norm_august_f <- ordinate(ps_cap_r_norm_august_f,
method = "CAP",
distance = "jsd",
k=3, trymax=1e3, weighted=TRUE,
formula = ~ Site.ID + Temp.C + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X133.Cs.ng.ml + X139.La.ng.ml + X238.U.ng.ml)
CAP_norm_ps_cap_r_norm_december_f <- ordinate(ps_cap_r_norm_december_f,
method = "CAP",
distance = "jsd",
k=3, trymax=1e3, weighted=TRUE,
formula = ~ Site.ID + Temp.C + EC.uS.cm + DO..sat + K.µg.ml + Na.µg.ml + Cl..µg.ml + SO4..µg.ml + X6.Li.ng.ml + X7.Li.ng.ml + X7.Li..He..ng.ml + X11.B.ng.ml + X45.Sc.ng.ml + X55.Mn.ng.ml + X59.Co.ng.ml + X60.Ni.ng.ml + X69.Ga.ng.ml + X75.As..He..ng.ml + X88.Sr.ng.ml + X89.Y.ng.ml + X133.Cs.ng.ml + X139.La.ng.ml + X238.U.ng.ml)
#april
arrowmat_CAP_norm_taxa_april <- vegan::scores(CAP_norm_ps_cap_r_norm_april_f, display = "bp")
arrowmat_CAP_norm_taxa_april<-subset(arrowmat_CAP_norm_taxa_april,
rownames(arrowmat_CAP_norm_taxa_april) %in% c("Temp.C","EC.uS.cm","Site.ID","K.µg.ml","Na.µg.ml"))
arrowdf_CAP_norm_taxa_april <- data.frame(labels = rownames(arrowmat_CAP_norm_taxa_april), arrowmat_CAP_norm_taxa_april)
rownames(arrowmat_CAP_norm_taxa_april) <- c("Site ID","Temperature","EC","K","Na")
rownames(arrowdf_CAP_norm_taxa_april) <- c("Site ID","Temperature","EC","K","Na")
arrowdf_CAP_norm_taxa_april$labels <- c("Site ID","Temperature","EC","K","Na")
arrow_map_CAP_norm_taxa_april <- aes(xend = CAP1, yend = CAP2,
x = 0, y = 0,
shape = NULL, color = NULL,
label = labels)
label_map_CAP_norm_taxa_april <- aes(x = 1.1 * CAP1, y = 1.1 * CAP2,
shape = NULL, color = NULL,
label = labels)
arrowhead_CAP_norm_taxa_april = arrow(length = unit(0.02, "npc"))
april_phyla=nrow(as.data.frame(table(tax_table(ps_cap_r_norm_april_f)[, "Phylum"], exclude = NULL)))
CAP_labels_april <- plot_ordination(ps_cap_r_norm_april_f,
ordination = CAP_norm_ps_cap_r_norm_april_f,
axes = c(1,2), type="taxa")
CAP_april_xlabel <- CAP_labels_april$labels$x
CAP_april_ylabel <- CAP_labels_april$labels$y
CAP_norm_april_taxa_sig_plot_a12 <- plot_ordination(ps_cap_r_norm_april_f,
ordination = CAP_norm_ps_cap_r_norm_april_f,
axes = c(1,2), type="taxa", justDF = TRUE)
CAP_norm_april_taxa_sig_plot_a12$Phylum=as.character(CAP_norm_april_taxa_sig_plot_a12$Phylum)
CAP_norm_april_taxa_sig_plot_a12$Class=as.character(CAP_norm_april_taxa_sig_plot_a12$Class)
ov_CAP_norm_april_taxa_sig_plot_a12<-subset(CAP_norm_april_taxa_sig_plot_a12,
Phylum != "Proteobacteria")
ov_CAP_norm_april_taxa_sig_plot_a12$Phylum=as.character(ov_CAP_norm_april_taxa_sig_plot_a12$Phylum)
class_ov_CAP_norm_april_taxa_sig_plot_a12<-subset(CAP_norm_april_taxa_sig_plot_a12,
Class %in% c("Epsilonproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria"))
class_ov_CAP_norm_april_taxa_sig_plot_a12$Class=as.character(class_ov_CAP_norm_april_taxa_sig_plot_a12$Class)
april_class <- length(unique(class_ov_CAP_norm_april_taxa_sig_plot_a12$Class))
final_CAP_norm_april_taxa_sig_plot_a12 = ggplot(data=CAP_norm_april_taxa_sig_plot_a12,
aes(x=CAP1,y=CAP2)) +
geom_point(data=ov_CAP_norm_april_taxa_sig_plot_a12,
aes(x=CAP1, y=CAP2,
color = Phylum, size = 4)) +
geom_point(data = class_ov_CAP_norm_april_taxa_sig_plot_a12,
aes(x=CAP1, y=CAP2,
color = Phylum,
shape = Class,
alpha = 1, size = 4)) +
geom_segment(mapping = arrow_map_CAP_norm_taxa_april, size = .5,
data = arrowdf_CAP_norm_taxa_april,
color = "black", arrow = arrowhead_CAP_norm_taxa_april) +
geom_text(mapping = label_map_CAP_norm_taxa_april,
data = subset(arrowdf_CAP_norm_taxa_april,
labels %in% c("Site ID", "Temperature","K","Na")),
size = 5, fontface="bold",
hjust = 0,
show.legend = FALSE) +
geom_text(mapping = label_map_CAP_norm_taxa_april,
data = subset(arrowdf_CAP_norm_taxa_april,
labels %in% c("EC")),
size = 5, fontface="bold",
show.legend = FALSE,
nudge_y = 0.05) +
scale_color_manual(values=distinctColorPalette(length(unique(CAP_norm_april_taxa_sig_plot_a12$Phylum))),
na.value = "black") +
scale_fill_manual(values=distinctColorPalette(length(unique(CAP_norm_april_taxa_sig_plot_a12$Phylum))),
na.value = "black") +
scale_shape_manual(values=c(15, 17, 18, 4)) +
guides(fill = FALSE,
size = FALSE,
alpha = FALSE,
color = guide_legend(nrow = april_phyla/2),
shape = guide_legend(nrow = april_phyla/2)) +
labs(subtitle = "Using all SVs above 1% relative abundance",
title= "April",
x = CAP_april_xlabel,
y = CAP_april_ylabel) +
theme(plot.title = element_text(hjust = 0.5),
legend.text = element_text(size = 12),
legend.title = element_text(size = 13, face = "bold"),
legend.position = "right",
legend.direction = "vertical",
legend.box.margin = margin(100, 6, 6, 6))
#august
arrowmat_CAP_norm_taxa_august <- vegan::scores(CAP_norm_ps_cap_r_norm_august_f, display = "bp")
arrowmat_CAP_norm_taxa_august<-subset(arrowmat_CAP_norm_taxa_august,
rownames(arrowmat_CAP_norm_taxa_august) %in% c("Temp.C","EC.uS.cm","Site.ID","K.µg.ml","Na.µg.ml"))
arrowdf_CAP_norm_taxa_august <- data.frame(labels = rownames(arrowmat_CAP_norm_taxa_august), arrowmat_CAP_norm_taxa_august)
rownames(arrowmat_CAP_norm_taxa_august) <- c("Site ID","Temperature","EC","K","Na")
rownames(arrowdf_CAP_norm_taxa_august) <- c("Site ID","Temperature","EC","K","Na")
arrowdf_CAP_norm_taxa_august$labels <- c("Site ID","Temperature","EC","K","Na")
arrow_map_CAP_norm_taxa_august <- aes(xend = CAP1, yend = CAP2,
x = 0, y = 0,
shape = NULL, color = NULL,
label = labels)
label_map_CAP_norm_taxa_august <- aes(x = 1.1 * CAP1, y = 1.1 * CAP2,
shape = NULL, color = NULL,
label = labels)
arrowhead_CAP_norm_taxa_august = arrow(length = unit(0.02, "npc"))
august_phyla=nrow(as.data.frame(table(tax_table(ps_cap_r_norm_august_f)[, "Phylum"], exclude = NULL)))
CAP_labels_august <- plot_ordination(ps_cap_r_norm_august_f,
ordination = CAP_norm_ps_cap_r_norm_august_f,
axes = c(1,2), type="taxa")
CAP_august_xlabel <- CAP_labels_august$labels$x
CAP_august_ylabel <- CAP_labels_august$labels$y
CAP_norm_august_taxa_sig_plot_a12 <- plot_ordination(ps_cap_r_norm_august_f,
ordination = CAP_norm_ps_cap_r_norm_august_f,
axes = c(1,2), type="taxa", justDF = TRUE)
CAP_norm_august_taxa_sig_plot_a12$Phylum=as.character(CAP_norm_august_taxa_sig_plot_a12$Phylum)
CAP_norm_august_taxa_sig_plot_a12$Class=as.character(CAP_norm_august_taxa_sig_plot_a12$Class)
ov_CAP_norm_august_taxa_sig_plot_a12<-subset(CAP_norm_august_taxa_sig_plot_a12,
Phylum != "Proteobacteria")
ov_CAP_norm_august_taxa_sig_plot_a12$Phylum=as.character(ov_CAP_norm_august_taxa_sig_plot_a12$Phylum)
class_ov_CAP_norm_august_taxa_sig_plot_a12<-subset(CAP_norm_august_taxa_sig_plot_a12,
Class %in% c("Epsilonproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria"))
class_ov_CAP_norm_august_taxa_sig_plot_a12$Class=as.character(class_ov_CAP_norm_august_taxa_sig_plot_a12$Class)
august_class <- length(unique(class_ov_CAP_norm_august_taxa_sig_plot_a12$Class))
final_CAP_norm_august_taxa_sig_plot_a12 = ggplot(data=CAP_norm_august_taxa_sig_plot_a12,
aes(x=CAP1,y=CAP2)) +
geom_point(data=ov_CAP_norm_august_taxa_sig_plot_a12,
aes(x=CAP1, y=CAP2,
color = Phylum, size = 4)) +
geom_point(data = class_ov_CAP_norm_august_taxa_sig_plot_a12,
aes(x=CAP1, y=CAP2,
color = Phylum,
shape = Class,
alpha = 1, size = 4)) +
geom_segment(mapping = arrow_map_CAP_norm_taxa_august, size = .5,
data = arrowdf_CAP_norm_taxa_august,
color = "black", arrow = arrowhead_CAP_norm_taxa_august) +
geom_text(mapping = label_map_CAP_norm_taxa_august,
data = subset(arrowdf_CAP_norm_taxa_august,
labels %in% c("Site ID", "Temperature","EC","Na")),
size = 5, fontface="bold",
hjust = 0,
show.legend = FALSE) +
geom_text(mapping = label_map_CAP_norm_taxa_august,
data = subset(arrowdf_CAP_norm_taxa_august,
labels %in% c("K")),
size = 5, fontface="bold",
show.legend = FALSE,
nudge_y = 0.05) +
scale_color_manual(values=distinctColorPalette(length(unique(CAP_norm_august_taxa_sig_plot_a12$Phylum))),
na.value = "black") +
scale_fill_manual(values=distinctColorPalette(length(unique(CAP_norm_august_taxa_sig_plot_a12$Phylum))),
na.value = "black") +
scale_shape_manual(values=c(15, 17, 18, 4)) +
guides(fill = FALSE,
color = guide_legend(nrow= august_phyla/2),
size = FALSE,
alpha = FALSE,
shape = guide_legend(nrow = august_phyla/2)) +
labs(title= "August",
x = CAP_august_xlabel,
y = CAP_august_ylabel) +
theme(plot.title = element_text(hjust = 0.5),
legend.text = element_text(size = 12),
legend.title = element_text(size = 13, face = "bold"),
legend.position = "right",
legend.direction = "vertical",
legend.box.margin = margin(100, 6, 6, 6))
#december
arrowmat_CAP_norm_taxa_december <- vegan::scores(CAP_norm_ps_cap_r_norm_december_f, display = "bp")
arrowmat_CAP_norm_taxa_december<-subset(arrowmat_CAP_norm_taxa_december,
rownames(arrowmat_CAP_norm_taxa_december) %in% c("Temp.C","EC.uS.cm","Site.ID","K.µg.ml","Na.µg.ml"))
arrowdf_CAP_norm_taxa_december <- data.frame(labels = rownames(arrowmat_CAP_norm_taxa_december), arrowmat_CAP_norm_taxa_december)
rownames(arrowmat_CAP_norm_taxa_december) <- c("Site ID","Temperature","EC","K","Na")
rownames(arrowdf_CAP_norm_taxa_december) <- c("Site ID","Temperature","EC","K","Na")
arrowdf_CAP_norm_taxa_december$labels <- c("Site ID","Temperature","EC","K","Na")
arrow_map_CAP_norm_taxa_december <- aes(xend = CAP1, yend = CAP2,
x = 0, y = 0,
shape = NULL, color = NULL,
label = labels)
label_map_CAP_norm_taxa_december <- aes(x = 1.1 * CAP1, y = 1.1 * CAP2,
shape = NULL, color = NULL,
label = labels)
arrowhead_CAP_norm_taxa_december = arrow(length = unit(0.02, "npc"))
december_phyla=nrow(as.data.frame(table(tax_table(ps_cap_r_norm_december_f)[, "Phylum"], exclude = NULL)))
CAP_labels_december <- plot_ordination(ps_cap_r_norm_december_f,
ordination = CAP_norm_ps_cap_r_norm_december_f,
axes = c(1,2), type="taxa")
CAP_december_xlabel <- CAP_labels_december$labels$x
CAP_december_ylabel <- CAP_labels_december$labels$y
CAP_norm_december_taxa_sig_plot_a12 <- plot_ordination(ps_cap_r_norm_december_f,
ordination = CAP_norm_ps_cap_r_norm_december_f,
axes = c(1,2), type="taxa", justDF = TRUE)
CAP_norm_december_taxa_sig_plot_a12$Phylum=as.character(CAP_norm_december_taxa_sig_plot_a12$Phylum)
CAP_norm_december_taxa_sig_plot_a12$Class=as.character(CAP_norm_december_taxa_sig_plot_a12$Class)
ov_CAP_norm_december_taxa_sig_plot_a12<-subset(CAP_norm_december_taxa_sig_plot_a12,
Phylum != "Proteobacteria")
ov_CAP_norm_december_taxa_sig_plot_a12$Phylum=as.character(ov_CAP_norm_december_taxa_sig_plot_a12$Phylum)
class_ov_CAP_norm_december_taxa_sig_plot_a12<-subset(CAP_norm_december_taxa_sig_plot_a12,
Class %in% c("Epsilonproteobacteria","Betaproteobacteria","Gammaproteobacteria","Deltaproteobacteria"))
class_ov_CAP_norm_december_taxa_sig_plot_a12$Class=as.character(class_ov_CAP_norm_december_taxa_sig_plot_a12$Class)
december_class <- length(unique(class_ov_CAP_norm_december_taxa_sig_plot_a12$Class))
final_CAP_norm_december_taxa_sig_plot_a12 = ggplot(data=CAP_norm_december_taxa_sig_plot_a12,
aes(x=CAP1,y=CAP2)) +
geom_point(data=ov_CAP_norm_december_taxa_sig_plot_a12,
aes(x=CAP1, y=CAP2,
color = Phylum, size = 4)) +
geom_point(data = class_ov_CAP_norm_december_taxa_sig_plot_a12,
aes(x=CAP1, y=CAP2,
color = Phylum,
shape = Class,
alpha = 1, size = 4)) +
geom_segment(mapping = arrow_map_CAP_norm_taxa_december, size = .5,
data = arrowdf_CAP_norm_taxa_december,
color = "black", arrow = arrowhead_CAP_norm_taxa_december) +
geom_text(mapping = label_map_CAP_norm_taxa_december,
data = arrowdf_CAP_norm_taxa_december,
size = 5, fontface="bold",
show.legend = FALSE) +
scale_color_manual(values=distinctColorPalette(length(unique(CAP_norm_december_taxa_sig_plot_a12$Phylum))),
na.value = "black") +
scale_fill_manual(values=distinctColorPalette(length(unique(CAP_norm_december_taxa_sig_plot_a12$Phylum))),
na.value = "black") +
scale_shape_manual(values=c(15, 17, 18, 4)) +
guides(fill = FALSE,
color = guide_legend(nrow = december_phyla/2),
size = FALSE,
alpha = FALSE,
shape = guide_legend(nrow = december_phyla/2)) +
labs(title= "December",
x = CAP_december_xlabel,
y = CAP_december_ylabel) +
theme(plot.title = element_text(hjust = 0.5),
legend.text = element_text(size = 12),
legend.title = element_text(size = 13, face = "bold"),
legend.position = "right",
legend.direction = "vertical",
legend.box.margin = margin(100, 6, 6, 6))
ps_cap_r_norm_Month_cat<-plot_grid(final_CAP_norm_april_taxa_sig_plot_a12,
final_CAP_norm_august_taxa_sig_plot_a12,
final_CAP_norm_december_taxa_sig_plot_a12,
align = 'v', ncol = 1)
It seems K, EC, Na and temperature influenced a subset of the microbial community mostly composed of Gamma- and Epsilonproteobacteria as well as Omnitrophica plus others across the year. Omintrophica and Parcubacteria seem to appear in different numbers across the year within this cluster.
So who defines this cluster? Let’s select the corresponding points of the CAP and figure out.
CAP_norm_cluster_april = subset(final_CAP_norm_april_taxa_sig_plot_a12$data,
CAP1 >= 0.75)
CAP_norm_cluster_august = subset(final_CAP_norm_august_taxa_sig_plot_a12$data,
CAP1 >= 0.75)
CAP_norm_cluster_december = subset(final_CAP_norm_december_taxa_sig_plot_a12$data,
CAP1 >= 0.75)
CAP_norm_cluster_april_names <- rownames(CAP_norm_cluster_april)
CAP_norm_cluster_august_names <- rownames(CAP_norm_cluster_august)
CAP_norm_cluster_december_names <- rownames(CAP_norm_cluster_december)
ps.b.r_april = subset_samples(ps.b.r, Month == "April")
ps.b.r_april_f = filter_taxa(ps.b.r_april, function(x) sum(x) > .01, TRUE)
ps.b.r_august = subset_samples(ps.b.r, Month == "August")
ps.b.r_august_f = filter_taxa(ps.b.r_august, function(x) sum(x) > .01, TRUE)
ps.b.r_december = subset_samples(ps.b.r, Month == "December")
ps.b.r_december_f = filter_taxa(ps.b.r_december, function(x) sum(x) > .01, TRUE)
ps.b.r_april_CAPcluster_taxa <- prune_taxa(CAP_norm_cluster_april_names, ps.b.r_april_f)
ps.b.r_august_CAPcluster_taxa <- prune_taxa(CAP_norm_cluster_august_names, ps.b.r_august_f)
ps.b.r_december_CAPcluster_taxa <- prune_taxa(CAP_norm_cluster_december_names, ps.b.r_december_f)
What are the Proteobacteria-to-other-phyla proportions here?
sv.phyl.p.april<-as.data.frame(table(tax_table(ps.b.r_april_CAPcluster_taxa)[, "Phylum"], exclude = NULL))
sv.phyl.p.april.ord <- sv.phyl.p.april[order(-sv.phyl.p.april$Freq),]
head(sv.phyl.p.april.ord, n=10)
## Var1 Freq
## 11 Proteobacteria 39
## 9 Omnitrophica 5
## 5 Cyanobacteria 2
## 7 Euryarchaeota 2
## 1 Acetothermia 1
## 2 Candidatus_Berkelbacteria 1
## 3 Chlorobi 1
## 4 Chloroflexi 1
## 6 Elusimicrobia 1
## 8 Nitrospirae 1
sv.phyl.p.august<-as.data.frame(table(tax_table(ps.b.r_august_CAPcluster_taxa)[, "Phylum"], exclude = NULL))
sv.phyl.p.august.ord <- sv.phyl.p.august[order(-sv.phyl.p.august$Freq),]
head(sv.phyl.p.august.ord, n=10)
## Var1 Freq
## 10 Proteobacteria 38
## 9 Parcubacteria 4
## 2 Cyanobacteria 2
## 8 Omnitrophica 2
## 1 Bacteroidetes 1
## 3 FCPU426 1
## 4 Firmicutes 1
## 5 Gemmatimonadetes 1
## 6 Microgenomates 1
## 7 Nitrospirae 1
sv.phyl.p.december<-as.data.frame(table(tax_table(ps.b.r_december_CAPcluster_taxa)[, "Phylum"], exclude = NULL))
sv.phyl.p.december.ord <- sv.phyl.p.december[order(-sv.phyl.p.december$Freq),]
head(sv.phyl.p.december.ord, n=10)
## Var1 Freq
## 9 Proteobacteria 32
## 8 Parcubacteria 4
## 7 Omnitrophica 3
## 6 Nitrospirae 2
## 11 WWE3 2
## 1 Chloroflexi 1
## 2 Cloacimonetes 1
## 3 Euryarchaeota 1
## 4 Firmicutes 1
## 5 Latescibacteria 1
ps.b.r_april_CAPcluster_taxa.glom <- tax_glom(ps.b.r_april_CAPcluster_taxa, taxrank = 'Genus', NArm = FALSE)
ps.b.r_april_CAPcluster_taxa.glom.psdf <- data.table(psmelt(ps.b.r_april_CAPcluster_taxa.glom))
ps.b.r_april_CAPcluster_taxa.glom.psdf$Phylum <- as.character(ps.b.r_april_CAPcluster_taxa.glom.psdf$Phylum)
ps.b.r_april_CAPcluster_taxa.glom.psdf$Class <- as.character(ps.b.r_april_CAPcluster_taxa.glom.psdf$Class)
ps.b.r_april_CAPcluster_taxa.glom.psdf$Genus <- as.character(ps.b.r_april_CAPcluster_taxa.glom.psdf$Genus)
ps.b.r_april_CAPcluster_taxa.plot<-ggplot(ps.b.r_april_CAPcluster_taxa.glom.psdf[Abundance > 0],
aes(x = Site.name, y = Abundance/3, fill = Genus)) +
geom_bar(stat = "identity") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(keywidth = 1, keyheight = 1)) +
ylab("Relative Abundance\n") +
scale_fill_manual(values=distinctColorPalette(length(unique(ps.b.r_april_CAPcluster_taxa.glom.psdf$Genus))),
na.value = "black")
ps.b.r_august_CAPcluster_taxa.glom <- tax_glom(ps.b.r_august_CAPcluster_taxa, taxrank = 'Genus', NArm = FALSE)
ps.b.r_august_CAPcluster_taxa.glom.psdf <- data.table(psmelt(ps.b.r_august_CAPcluster_taxa.glom))
ps.b.r_august_CAPcluster_taxa.glom.psdf$Phylum <- as.character(ps.b.r_august_CAPcluster_taxa.glom.psdf$Phylum)
ps.b.r_august_CAPcluster_taxa.glom.psdf$Class <- as.character(ps.b.r_august_CAPcluster_taxa.glom.psdf$Class)
ps.b.r_august_CAPcluster_taxa.glom.psdf$Genus <- as.character(ps.b.r_august_CAPcluster_taxa.glom.psdf$Genus)
ps.b.r_august_CAPcluster_taxa.plot<-ggplot(ps.b.r_august_CAPcluster_taxa.glom.psdf[Abundance > 0],
aes(x = Site.name, y = Abundance/3, fill = Genus)) +
geom_bar(stat = "identity") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(keywidth = 1, keyheight = 1)) +
ylab("Relative Abundance\n") +
scale_fill_manual(values=distinctColorPalette(length(unique(ps.b.r_august_CAPcluster_taxa.glom.psdf$Genus))),
na.value = "black")
ps.b.r_december_CAPcluster_taxa.glom <- tax_glom(ps.b.r_december_CAPcluster_taxa, taxrank = 'Genus', NArm = FALSE)
ps.b.r_december_CAPcluster_taxa.glom.psdf <- data.table(psmelt(ps.b.r_december_CAPcluster_taxa.glom))
ps.b.r_december_CAPcluster_taxa.glom.psdf$Phylum <- as.character(ps.b.r_december_CAPcluster_taxa.glom.psdf$Phylum)
ps.b.r_december_CAPcluster_taxa.glom.psdf$Class <- as.character(ps.b.r_december_CAPcluster_taxa.glom.psdf$Class)
ps.b.r_december_CAPcluster_taxa.glom.psdf$Genus <- as.character(ps.b.r_december_CAPcluster_taxa.glom.psdf$Genus)
ps.b.r_december_CAPcluster_taxa.plot<-ggplot(ps.b.r_december_CAPcluster_taxa.glom.psdf[Abundance > 0],
aes(x = Site.name, y = Abundance/3, fill = Genus)) +
geom_bar(stat = "identity") +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(keywidth = 1, keyheight = 1)) +
ylab("Relative Abundance\n") +
scale_fill_manual(values=distinctColorPalette(length(unique(ps.b.r_december_CAPcluster_taxa.glom.psdf$Genus))),
na.value = "black")
ps.b.r_CAPcluster_taxa_plot <- plot_grid(ps.b.r_april_CAPcluster_taxa.plot,
ps.b.r_august_CAPcluster_taxa.plot,
ps.b.r_december_CAPcluster_taxa.plot,
align = 'v', ncol = 1)
ps.b.r_CAPcluster_taxa_plot
As expected, the genus-level analysis shows that the main Epsilonproteobacterial players are the ones modulating this shift showed by the CAP. Further, the Gammaproteobacteria noticed in that shifted cluster are represented by Thiotrix. In April, Omnitrophica follows Proteobacteria in SV presence, whilst in August and December it’s Parcubacteria, both of these in numbers ~8x smaller than Proteobacterial SVs.
Importantly, this cluster is, albeit not in huge amounts, driven by temperature, K, EC and Na, bit less in December. What can this mean mostly for these environmental variables? I can see how temperature might impact given that in Crumlin, Celynen and Six Bells are amongst the warmest sites. The first two also present the highest EC values in the same figure.
Ok then, I have no idea what to do at this point, passing on to other stuff.